import argparse
import os
import json
import ast
from prompt.aitzPrompt import AITZ_FOROSATLAS, AITZ_FORUITARS, AITZ_FORGUIR1, AITZ_OS_GENESIS_PROMPT, AITZHIGHACTIONPREDICTPROMPT_FOROSATLAS
import re, math
from tqdm import tqdm
import sys
sys.path.append("./")
from utils.schema.GUI_OWL.common import pil_to_base64, message_translate
from utils.logging_utils import setup_logger_to_stdout
from preprocess_base import BasePreProcess
from utils.utils_odyssey.parameters import Point, Direction

logger = setup_logger_to_stdout()

def parse_args(args=None, namespace=None):
    parser = argparse.ArgumentParser(description='Origin Dataset To Json')
    parser.add_argument('--dataset_name', type=str, default="AITZ",
                        help='dataset name')
    parser.add_argument('--dataset_type', type=str, default='all_low', help='dataset type')
    parser.add_argument('--dataset_path', type=str, default="/data3/cpz/datasets/android_in_the_zoo",
                        help='dataset path')
    parser.add_argument('--model_name', type=str, default="Aguvis",
                        help='model name')
    parser.add_argument('--save_path', type=str, default="/Agent_ScanKit/datasets/json",
                        help='save path')
    return parser.parse_args()


  
class AITZPreProcess(BasePreProcess):
    def __init__(self, dataset_type, dataset_path, dataset_name, save_path, model_name):
        super().__init__(dataset_path, dataset_name, save_path, model_name)
        self.dataset_type = dataset_type
        self.dataset_path = dataset_path
        self.dataset_name = dataset_name
        self.model_name = model_name

    def OS_ATLAS(self):
        sample = super().OS_ATLAS()
        def actionMapping(action):
            """
            ['CLICK', 'SCROLL', 'TYPE', 'PRESS_HOME', 'PRESS_BACK', 'ENTER']
            """
            action_type = action['result_action_type']
            action_text = action['result_action_text']
            coat_action_desc = action['coat_action_desc'].lower()
            if action_type == 4:
                if coat_action_desc.startswith("scroll"):
                    if "up" in coat_action_desc:
                        return f"SCROLL [UP]"
                    elif "down" in coat_action_desc:
                        return f"SCROLL [DOWN]"
                    elif "left" in coat_action_desc:
                        return f"SCROLL [LEFT]"
                    elif "right" in coat_action_desc:
                        return f"SCROLL [RIGHT]"
                else:
                    click = ast.literal_eval(action['result_touch_yx'])
                    y, x = click[0]*1000, click[1]*1000
                    return f"CLICK <point>[[{x}, {y}]]</point>" 
            elif action_type == 6:
                return "PRESS_HOME"
            elif action_type == 5:
                return "PRESS_BACK"
            elif action_type == 3:
                return f"TYPE [{action_text}]"
            elif action_type == 7:
                return "ENTER"
            elif action_type == 10:
                return "COMPLETE"
            else:
                logger.error(f"Action mapping error: {action}")
        
        path = self._merge_dataset_path()
        for key in path:
            if key == 'train':
                continue
            data = []
            logger.info(f"Processing the {key} samples in the {self.dataset_name}")
            for path_item in path[key]:
                logger.info(f"Processing: {path_item}")
                for episode in path[key][path_item]:
                    # logger.info(f"Processing the episode: {episode}")
                    metadata = self.readJson(os.path.join(episode, str(episode.split('/')[-1])+".json"))
                    previous_action_history = []
      
                    index = [i['step_id'] for i in metadata]
                    for i, idx in enumerate(index):
                        from copy import deepcopy
                        record = deepcopy(sample)
                        record["episode_id"] = metadata[i]["episode_id"]
                        record["step_id"] = metadata[i]["step_id"]
                        record["images"] = [os.path.join(episode, str(episode.split('/')[-1])+"_"+str(idx)+'.png')]
                        record['accessibility_trees'] = os.path.join(episode, str(episode.split('/')[-1])+".json")
                        record['goal'] = metadata[i]['instruction']
                        image_size = self.readImage(record['images'][0])
                        record['image_size'] = [[image_size[0], image_size[1]]]
                        try:
                            record['messages'][1]['content'] = actionMapping(metadata[i])
                        except Exception as e:
                            logger.info(f'extract action failure: {e}')
                        record['label'] = "action:\n"+record['messages'][1]['content']
                        if 'low' in self.dataset_type:
                            record['messages'][0]['content'] = AITZ_FOROSATLAS.format(
                                finalGoal=metadata[i]['instruction'],
                                actionDesc=metadata[i]['coat_action_desc'],
                                SD=metadata[i]['coat_screen_desc'],
                                previousActions=previous_action_history[:i],
                            )
                        else:
                            record['messages'][0]['content'] = AITZHIGHACTIONPREDICTPROMPT_FOROSATLAS.format(
                                finalGoal=metadata[i]['instruction'],
                                SD=metadata[i]['coat_screen_desc'],
                                previousActions=previous_action_history[:i],
                            )
                        previous_action_history.append(metadata[i]['coat_action_desc'])
                        data.append(record)
             
            if not os.path.exists(self.save_path):
                os.makedirs(self.save_path)
            self.saveJson(data, os.path.join(self.save_path, self.dataset_type+"_"+str(key)+"_"+self.model_name.lower()+'.json'))
            logger.info(f"transform {self.dataset_type} of {self.dataset_name} dataset to json succesfully")
        logger.info("Finished")

    def UI_TARS(self):
        sample = super().UI_TARS()
        def actionMapping(action, image_size):
            """
            ['CLICK', 'SCROLL', 'TYPE', 'PRESS_HOME', 'PRESS_BACK', 'ENTER']
            """
            action_type = action['result_action_type']
            action_text = action['result_action_text']
            coat_action_desc = action['coat_action_desc'].lower()
            if action_type == 4:
                if coat_action_desc.startswith("scroll"):
                    if "up" in coat_action_desc:
                        return f"scroll(direction='down')"
                    elif "down" in coat_action_desc:
                        return f"scroll(direction='up')"
                    elif "left" in coat_action_desc:
                        return f"scroll(direction='right')"
                    elif "right" in coat_action_desc:
                        return f"scroll(direction='left')"
                else:
                    click = ast.literal_eval(action['result_touch_yx'])
                    ## UI-TARS-1.5 is absoulte coord
                    if "1.5" in self.model_name:
                        y, x = int(click[0]*image_size[1]), int(click[1]*image_size[0])
                    else:
                        y, x = int(click[0]*1000), int(click[1]*1000)
                    return f"click(start_box='({x},{y})')" 
            elif action_type == 6:
                return "press_home()"
            elif action_type == 5:
                return "press_back()"
            elif action_type == 3:
                return f"type(content='{action_text}')"
            elif action_type == 10:
                return "finished()"
            elif action_type == 7:
                return f"enter()"
            else:
                logger.error(f"Action mapping error: {action}")

        def build_history(index, new_metadata):
            history = []
            image_indices = range(0, index) if index <= 4 else range(index - 4, index)
            for i in range(len(new_meta_data['screenshots'])):
                if i in image_indices:
                    image_history = {
                        "role": "user",
                        "content": [
                            {
                                "type": "image",
                                "image": new_metadata["screenshots"][i]
                            }
                        ]
                    }
                    history.append(image_history)
                if i in image_indices:
                    action = new_metadata["action_traslate"][i]
                    thought = new_metadata["step_instruction"][i]
                    text_history = {
                        "role": "assistant",
                        "content": [
                            {"type": "text", "text": f"Thought: {thought}\nAction: {action}"}
                        ]
                    }
                    history.append(text_history)
            return history
        
        path = self._merge_dataset_path()
        for key in path:
            if key == 'train':
                continue
            data = []
            logger.info(f"Processing the {key} samples in the {self.dataset_name}")
            for path_item in path[key]:
                logger.info(f"Processing: {path_item}")
                for episode in tqdm(path[key][path_item]):
                    # logger.info(f"Processing the episode: {episode}")
                    new_meta_data = {}
                    metadata = self.readJson(os.path.join(episode, str(episode.split('/')[-1])+".json"))
                    new_meta_data['screenshots'] = [os.path.join(episode, item['image_path'].split('/')[-1]) for item in metadata]
                    new_meta_data['image_size'] = [self.readImage(path) for path in new_meta_data['screenshots']]
                    new_meta_data["action_traslate"] = [actionMapping(action, image_size) for action, image_size in zip(metadata, new_meta_data['image_size'])]
                    new_meta_data["step_instruction"] = [step_instruction_item['coat_action_desc'] for step_instruction_item in metadata]

                    for i in range(len(new_meta_data['screenshots'])):
                        from copy import deepcopy
                        record = deepcopy(sample)
                        record['messages'][1]['content'][0]['text'] = AITZ_FORUITARS.format(
                            instruction= metadata[i]['instruction']
                        )
                        if 'low' in self.dataset_type:
                            if i != 0:
                                record['messages'].extend(build_history(i, new_meta_data)) 
                            record['messages'].extend([
                                {
                                    "role": "user",
                                    "content": [{
                                        "type": "image",
                                        "image": new_meta_data['screenshots'][i]
                                    }]
                                },
                                {
                                    "role": "assistant",
                                    "content": [{
                                        "type": "text",
                                        "text": f"Thought: {new_meta_data['step_instruction'][i]}\n"
                                    }]
                                }
                            ])
                        else:
                            if i != 0:
                                record['messages'].extend(build_history(i, new_meta_data)) 
                            record['messages'].extend([
                                {
                                    "role": "user",
                                    "content": [
                                        {
                                            "type": "image",
                                            "image": new_meta_data['screenshots'][i].replace(
                                            "android_control_parsed_data_fixed/",
                                            "/data/cpz/datasets/android_control_parsed/"
                                            )
                                        }
                                    ]
                                }
                            ])
                        record['accessibility_trees'] = os.path.join(episode, str(episode.split('/')[-1])+".json")
                        record['label'] = f"Thought: {new_meta_data['step_instruction'][i]}\nAction: {new_meta_data['action_traslate'][i]}"
                        record["episode_id"] = metadata[i]["episode_id"]
                        record["step_id"] = metadata[i]['step_id']
                        record["images"] = [new_meta_data['screenshots'][i]]
                        record['goal'] = metadata[i]['instruction']   
                        record['image_size'] = [new_meta_data['image_size'][i]]
                        data.append(record)
             
            if not os.path.exists(self.save_path):
                os.makedirs(self.save_path)
            self.saveJson(data, os.path.join(self.save_path, self.dataset_type+"_"+str(key)+"_"+self.model_name.lower()+'.json'))
            logger.info(f"transform {self.dataset_type} of {self.dataset_name} dataset to json succesfuully")
        logger.info("Finished")

    def GUI_R1(self):
        sample = super().GUI_R1()
        def actionMapping(action):
            t = action['result_action_type']
            coat_action_desc = action['coat_action_desc']
            click = ast.literal_eval(action['result_touch_yx'])
            image_size = action['image_size']
            if t == 6:
                action_name = 'press_home'
            elif t == 5:
                action_name = 'press_back'
            elif t == 7:
                action_name = 'enter'
            elif t == 10:
                action_name = 'complete'
            else:
                action_name = t

            if t in [10, 7, 5, 6]:
                input_text = 'no input text'
            elif t == 3: 
                action_name = 'type'
                input_text = action['result_action_text']  

            if t == 4:
                if coat_action_desc.startswith("scroll"):
                    if "up" in coat_action_desc:
                        input_text = 'up'
                    elif "down" in coat_action_desc:
                        input_text = 'down'
                    elif "left" in coat_action_desc:
                        input_text = 'left'
                    elif "right" in coat_action_desc:
                        input_text = 'right'
                    point = [-100, 100]
                    action_name = 'scroll'
                else:
                    y, x = int(click[0]*image_size[1]), int(click[1]*image_size[0])
                    input_text = 'no input text'
                    point = [x, y]
                    action_name = 'click'
            else:
                point = [-100, -100]
            formatted_action = [{
                'action': action_name,
                'point': point,
                'input_text': input_text
            }]
            return str(formatted_action)
        
        path = self._merge_dataset_path()
        for key in path:
            data = []
            logger.info(f"Processing the {key} samples in the {self.dataset_name}")
            for path_item in path[key]:
                logger.info(f"Processing: {path_item}")
                for episode in path[key][path_item]:
                    # logger.info(f"Processing the episode: {episode}")
                    metadata = self.readJson(os.path.join(episode, str(episode.split('/')[-1])+".json"))
                    metadata = [
                        sample | {"image_size": self.readImage(os.path.join(episode, ''.join(sample["image_path"].split("/")[2:]))) if "image_path" in sample else None}
                        for sample in metadata
                    ]
                    action_traslate = [actionMapping(metadata[index]) for index in range(len(metadata))]
                    previous_action_history = []
      
                    index = [i['step_id'] for i in metadata]
                    for i, idx in enumerate(index):
                        from copy import deepcopy
                        record = deepcopy(sample)
                        record["episode_id"] = metadata[i]["episode_id"]
                        record["step_id"] = metadata[i]["step_id"]
                        record["images"] = [os.path.join(episode, str(episode.split('/')[-1])+"_"+str(idx)+'.png')]
                        record['accessibility_trees'] = os.path.join(episode, str(episode.split('/')[-1])+".json")
                        record['goal'] = metadata[i]['instruction']
                        record['low_level_goal'] = metadata[i]['coat_action_desc']
                        record['image_size'] = [[metadata[i]['image_size'][0], metadata[i]['image_size'][1]]]
                        record['label'] = "<think></think><answer>"+action_traslate[i]+"</answer>"
                        record['messages'][0]['content'][0]['image'] = record['images'][0] 
                        record['messages'][0]['content'][1]['text'] = '<image>\n' + AITZ_FORGUIR1.replace("{goal}", metadata[i]['coat_action_desc']).replace("{history}", ','.join(previous_action_history[:i]))
                        previous_action_history.append(metadata[i]['coat_action_desc'])
                        data.append(record)
             
            if not os.path.exists(self.save_path):
                os.makedirs(self.save_path)
            self.saveJson(data, os.path.join(self.save_path, self.dataset_type+"_"+str(key)+"_"+self.model_name.lower()+'.json'))
            logger.info(f"transform {self.dataset_type} of {self.dataset_name} dataset to json succesfully")
        logger.info("Finished")
                        
    def Agent_CPM(self):
        sample = super().Agent_CPM()
        def actionMapping(action):
            """
            ['click', 'open_app', 'long_press', 'navigate_home', 'scroll', 'navigate_back', 'wait', 'input_text']
            """
            t = action['result_action_type']
            coat_action_desc = action['coat_action_desc']
            click = ast.literal_eval(action['result_touch_yx'])
            image_size = action['image_size']
            if t == 4:
                if coat_action_desc.startswith("scroll"):
                    if "up" in coat_action_desc:
                        direction = 'up'
                    elif "down" in coat_action_desc:
                        direction = 'down'
                    elif "left" in coat_action_desc:
                        direction = 'left'
                    elif "right" in coat_action_desc:
                        direction = 'right'
                    return str({"thought":"", "POINT": [-100, -100], "to": direction})
                else:
                    y, x = int(click[0]*1000), int(click[1]*1000)
                    return str({"thought":"", "POINT": [x, y]})
            elif t == 6:
                return str({"thought":"", "PRESS": "HOME"})
            elif t == 5:
                return str({"thought":"", "PRESS": "BACK"})
            elif t == 3:
                return str({"thought":"", "TYPE": action['result_action_text']})
            elif t == 10:
                return str({"thought":"", 'STATUS': 'finish'})
            elif t == 7:
                return str({"thought":"", 'PRESS': 'ENTER'})
            else:
                return "error" 
        from prompt.aitzPrompt import AITZ_AGENT_CPM_SYSTEM_PROMPT
        ACTION_SCHEMA = json.load(open('/Agent_ScanKit/utils/schema/agentCPMSchema.json', encoding="utf-8"))
        items = list(ACTION_SCHEMA.items())
        insert_index = 3
        items.insert(insert_index, ("required", ["thought"])) 
        ACTION_SCHEMA = dict(items)
        AITZ_AGENT_CPM_SYSTEM_PROMPT = AITZ_AGENT_CPM_SYSTEM_PROMPT.replace("ACTION_SCHEMA", str(ACTION_SCHEMA))
                
        path = self._merge_dataset_path()
        for key in path:
            data = []
            logger.info(f"Processing the {key} samples in the {self.dataset_name}")
            for path_item in path[key]:
                logger.info(f"Processing: {path_item}")
                for episode in path[key][path_item]:
                    # logger.info(f"Processing the episode: {episode}")
                    metadata = self.readJson(os.path.join(episode, str(episode.split('/')[-1])+".json"))
                    metadata = [
                        sample | {"image_size": self.readImage(os.path.join(episode, ''.join(sample["image_path"].split("/")[2:]))) if "image_path" in sample else None}
                        for sample in metadata
                    ]
                    action_traslate = [actionMapping(action) for action in metadata]
                    previous_action_history = []

                    index = [i['step_id'] for i in metadata]
                    for i, idx in enumerate(index):
                        from copy import deepcopy
                        record = deepcopy(sample)
                        record["episode_id"] = metadata[i]["episode_id"]
                        record["step_id"] = metadata[i]["step_id"]
                        record["images"] = [os.path.join(episode, str(episode.split('/')[-1])+"_"+str(idx)+'.png')]
                        record['accessibility_trees'] = os.path.join(episode, str(episode.split('/')[-1])+".json")
                        record['goal'] = metadata[i]['instruction']
                        image_size = self.readImage(record['images'][0])
                        record['image_size'] = [[image_size[0], image_size[1]]]
                        record['label'] = action_traslate[i]
                        if 'low' in self.dataset_type:
                            record['messages'][0]['content'][0] = record['messages'][0]['content'][0].replace("text_prompt", metadata[i]['coat_action_desc'])
                        else:
                            record['messages'][0]['content'][0] = record['messages'][0]['content'][0].replace("text_prompt", metadata[i]['goal'])
                        record['system_prompt'] = AITZ_AGENT_CPM_SYSTEM_PROMPT
                        previous_action_history.append(metadata[i]['coat_action_desc'])
                        data.append(record)

            if not os.path.exists(self.save_path):
                os.makedirs(self.save_path)
            self.saveJson(data, os.path.join(self.save_path, self.dataset_type+"_"+str(key)+"_"+self.model_name.lower()+'.json'))
            logger.info(f"transform {self.dataset_type} of {self.dataset_name} dataset to json succesfuully")
        logger.info("Finished")


    def Aguvis(self):
        sample = super().Aguvis()
        from utils.schema.aguvisConstants import user_instruction
        def actionMapping(action):
            """
            ['click', 'open_app', 'long_press', 'navigate_home', 'scroll', 'navigate_back', 'wait', 'input_text']
            """
            t = action['result_action_type']
            coat_action_desc = action['coat_action_desc']
            click = ast.literal_eval(action['result_touch_yx'])
            image_size = action['image_size']
            if t == 4:
                if coat_action_desc.startswith("scroll"):
                    if "up" in coat_action_desc:
                        return "assistantos\npyautogui.scroll(page=-0.1)"
                    elif "down" in coat_action_desc:
                        return "assistantos\npyautogui.scroll(page=0.1)"
                    elif "left" in coat_action_desc:
                        return "assistantos\npyautogui.hscroll(page=0.1)"
                    elif "right" in coat_action_desc:
                        return "assistantos\npyautogui.hscroll(page=-0.1)"
                else:
                    y, x = click[0], click[1]
                    return f"assistantos\npyautogui.click(x={x}, y={y})"
            elif t == 6:
                return f"assistantos\nmobile.home()"
            elif t == 5:
                return f"assistantos\nmobile.back()"
            elif t == 3:
                return f"assistantos\npyautogui.write(message='{action['result_action_text']}')"
            elif t == 10:
                return "assistantos\nmobile.terminate(status='success')"
            elif t == 7:
                return f"assistantos\npyautogui.press(key=['enter'])"
            else:                
                return "error" 
        
        path = self._merge_dataset_path()
        for key in path:
            if key == 'train':
                continue
            data = []
            logger.info(f"Processing the {key} samples in the {self.dataset_name}")
            for path_item in path[key]:
                logger.info(f"Processing: {path_item}")
                for episode in path[key][path_item]:
                    # logger.info(f"Processing the episode: {episode}")
                    metadata = self.readJson(os.path.join(episode, str(episode.split('/')[-1])+".json"))
                    metadata = [
                        sample | {"image_size": self.readImage(os.path.join(episode, ''.join(sample["image_path"].split("/")[2:]))) if "image_path" in sample else None}
                        for sample in metadata
                    ]
                    action_traslate = [actionMapping(action) for action in metadata]
                    previous_action_history = []

                    index = [i['step_id'] for i in metadata]
                    for i, idx in enumerate(index):
                        from copy import deepcopy
                        record = deepcopy(sample)
                        record["episode_id"] = metadata[i]["episode_id"]
                        record["step_id"] = metadata[i]["step_id"]
                        record["images"] = [os.path.join(episode, str(episode.split('/')[-1])+"_"+str(idx)+'.png')]
                        record['accessibility_trees'] = os.path.join(episode, str(episode.split('/')[-1])+".json")
                        record['goal'] = metadata[i]['instruction']
                        image_size = self.readImage(record['images'][0])
                        record['image_size'] = [[image_size[0], image_size[1]]]
                        record['label'] = action_traslate[i]
                        # print(record['messages']['content'][1]['text'])
                        if 'low' in self.dataset_type:
                            record['messages']['content'][1]['text'] = user_instruction.format(
                                overall_goal=record['goal'], 
                                previous_actions=previous_action_history, 
                                low_level_instruction=metadata[i]['coat_action_desc']
                            )
                            record['is_low_level_instruction'] = True
                            record['low_level_instruction'] = metadata[i]['coat_action_desc']
                        else:
                            record['messages']['content'][1]['text'] = user_instruction.format(
                                overall_goal=record['goal'], 
                                previous_actions=previous_action_history, 
                            )
                            record['is_low_level_instruction'] = False
                        record['mode'] = 'force-plan'   
                        previous_action_history.append(metadata[i]['coat_action_desc'])
                        data.append(record)

            if not os.path.exists(self.save_path):
                os.makedirs(self.save_path)
            self.saveJson(data, os.path.join(self.save_path, self.dataset_type+"_"+str(key)+"_"+self.model_name.lower()+'.json'))
            logger.info(f"transform {self.dataset_type} of {self.dataset_name} dataset to json succesfuully")
        logger.info("Finished")

    def OS_Genesis(self):
        sample = super().OS_Genesis()
        def get_a11_tree(metadata):
            clickable_nodes = {}
            ui_positions = ast.literal_eval(metadata['ui_positions'])
            ui_text = ast.literal_eval(metadata['ui_text'])
            ui_type = ast.literal_eval(metadata['ui_types'])
            for i, item in enumerate(ui_positions):
                y, x, h, w = item[0], item[1], item[2], item[3] 
                center_x, center_y = x+w/2, y+h/2
                if ui_type[i] == 'TEXT':
                    clickable_nodes[ui_text[i]] = (center_x, center_y)
                else:
                    clickable_nodes[ui_type[i]] = (center_x, center_y)
            return clickable_nodes
        
        def actionMapping(action):
            t = action['result_action_type']
            thought = action['coat_action_desc']
            click = ast.literal_eval(action['result_touch_yx'])
            image_size = action['image_size']
            if t == 4:
                if thought.startswith("scroll"):
                    if "up" in thought:
                        return f'Low-level thought: {thought} action: {{"action_type": "scroll", "direction": "up"}}'
                    elif "down" in thought:
                        return f'Low-level thought: {thought} action: {{"action_type": "scroll", "direction": "down"}}'
                    elif "left" in thought:
                        return f'Low-level thought: {thought} action: {{"action_type": "scroll", "direction": "left"}}'
                    elif "right" in thought:
                        return f'Low-level thought: {thought} action: {{"action_type": "scroll", "direction": "right"}}'
                else:
                    y, x = int(click[0]*image_size[1]), int(click[1]*image_size[0])
                    return f'Low-level thought: {thought} action: {{"action_type": "click", "x": {x}, "y": {y}}}'
            elif t == 6:
                return f'Low-level thought: {thought} action: {{"action_type": "navigate_home"}}'
            elif t == 5:
                return f'Low-level thought: {thought} action: {{"action_type": "navigate_back"}}'
            elif t == 7:
                return f'Low-level thought: {thought} action: {{"action_type": "enter"}}'
            elif t == 3:
                action_dict = {
                    "action_type": "type",
                    "text": action["result_action_text"],
                    "x": -100,
                    "y": -100
                }
                json_action = json.dumps(action_dict)
                return f"Low-level thought: {thought} action: {json_action}"
            elif t == 10:
                return f'Low-level thought: {thought} action: {{"action_type": "stop"}}'
            else:
                return "error" 
          
        path = self._merge_dataset_path()
        for key in path:
            data = []
            logger.info(f"Processing the {key} samples in the {self.dataset_name}")
            for path_item in path[key]:
                logger.info(f"Processing: {path_item}")
                for episode in path[key][path_item]:
                    # logger.info(f"Processing the episode: {episode}")
                    metadata = self.readJson(os.path.join(episode, str(episode.split('/')[-1])+".json"))
                    metadata = [
                        sample | {"image_size": self.readImage(os.path.join(episode, ''.join(sample["image_path"].split("/")[2:]))) if "image_path" in sample else None}
                        for sample in metadata
                    ]
                    action_traslate = [actionMapping(action) for action in metadata]
                    previous_action_history = []

                    index = [i['step_id'] for i in metadata]
                    for i, idx in enumerate(index):
                        from copy import deepcopy
                        record = deepcopy(sample)
                        record["episode_id"] = metadata[i]["episode_id"]
                        record["step_id"] = metadata[i]["step_id"]
                        record["images"] = [os.path.join(episode, str(episode.split('/')[-1])+"_"+str(idx)+'.png')]
                        record['accessibility_trees'] = os.path.join(episode, str(episode.split('/')[-1])+".json")
                        record['goal'] = metadata[i]['instruction']
                        record['image_size'] = [metadata[i]['image_size']]
                        record['label'] = action_traslate[i]
                        record['question'] = AITZ_OS_GENESIS_PROMPT.format(instruction=record['goal'], history='\n'.join(previous_action_history), a11y_tree=str(get_a11_tree(metadata[i])),low_level_thought=metadata[i]['coat_action_desc'])
                        record['low_level_instruction'] = metadata[i]['coat_action_desc']
                        previous_action_history.append(f"Step {i}:"+str(metadata[i]['coat_action_desc']))
                        data.append(record)

            if not os.path.exists(self.save_path):
                os.makedirs(self.save_path)
            self.saveJson(data, os.path.join(self.save_path, self.dataset_type+"_"+str(key)+"_"+self.model_name.lower()+'.json'))
            logger.info(f"transform {self.dataset_type} of {self.dataset_name} dataset to json succesfuully")
        logger.info("Finished")

    def GUI_Odyssey(self):
        sample = super().GUI_Odyssey()
        def transform_actions(action:dict) -> str:
            action_type:int = action["result_action_type"]

            if action_type == 7:
                return 'PRESS_ENTER' 
            elif action_type == 1:
                return 'NO_ACTION'   
            elif action_type == 3:
                return f'TYPE: {action["result_action_text"]}'
            elif action_type == 6:
                return 'PRESS_HOME'
            elif action_type == 5:
                return 'PRESS_BACK'
            elif action_type == 10:
                return 'COMPLETE'
            elif action_type == 4:
                # deal with the click or scroll.
                sy, sx =  map(lambda x: round(x * 1000), json.loads(action["result_touch_yx"]))
                ey, ex =  map(lambda x: round(x * 1000), json.loads(action["result_lift_yx"]))

                if sx == ex and sy == ey:
                    return f'CLICK: ({ex}, {ey})'
                else:
                    from utils.utils_odyssey.parameters import get_direction
                    direction = get_direction({"x":sx, "y":sy}, {"x": ex, "y": ey})
                    return f'SCROLL: {direction.upper()}'

            else:
                raise NotImplementedError (f"No matching type for type {action_type}")

        path = self._merge_dataset_path()
        for key in path:
            if key == 'train':
                continue
            data = []
            hit_index = {}
            logger.info(f"Processing the {key} samples in the {self.dataset_name}")
            for path_item in path[key]:
                logger.info(f"Processing: {path_item}")
                for episode in tqdm(path[key][path_item]):
                    metadata = self.readJson(os.path.join(episode, str(episode.split('/')[-1])+".json"))
                    metadata = [
                        sample | {"image_size": self.readImage(os.path.join(episode, ''.join(sample["image_path"].split("/")[2:]))) if "image_path" in sample else None}
                        for sample in metadata
                    ]
                    action_traslate = [transform_actions(action) for action in metadata]
                    previous_action_history: list = []
                    previous_screenshot_history: list = []
                    index = [i['step_id'] for i in metadata]
                    for i, idx in enumerate(index):
                        from copy import deepcopy
                        record = deepcopy(sample)
                        record["episode_id"] = metadata[i]["episode_id"]
                        record["step_id"] = metadata[i]["step_id"]
                        img = os.path.join(episode, str(episode.split('/')[-1])+"_"+str(idx)+'.png')
                        hit_index[f"{img}"] = previous_screenshot_history[:i]
                        record['history_screenshot'] = previous_screenshot_history[:i]
                        record["images"] = [img]
                        record['accessibility_trees'] = os.path.join(episode, str(episode.split('/')[-1])+".json")
                        record['goal'] = metadata[i]['instruction']
                        record['image_size'] = [metadata[i]['image_size']]
                        record['label'] = action_traslate[i]
                        if "low" in self.dataset_type:
                            question = record['question'].format(
                                instruction=metadata[i]['coat_action_desc'], 
                                image_path=record['images'][0]
                            )
                        else:
                            question = record['question'].format(
                                instruction=record['goal'], 
                                image_path=record['images'][0]
                            )
                        if i > 0:
                            his_img = f'\nPrevious screenshots: <img>image-history: {img}</img>'
                            his_str = '\nPrevious Actions: '
                            for idx, hi in enumerate(previous_action_history[-4:]):
                                his_str += f"{idx+1}. {hi}\n"
                            question = f"{question}{his_img}{his_str}"
                        else:
                            question += f'\nPrevious screenshots: None'
                            question += f'\nPrevious Actions: None'
                        question += '\nProvide the command-style action directly.'
                        record['question'] = question
                        record['low_level_instruction'] = metadata[i]['coat_action_desc']
                        previous_action_history.append(action_traslate[i])
                        previous_screenshot_history.append(img)
                        data.append(record)

            if not os.path.exists(self.save_path):
                os.makedirs(self.save_path)
            self.saveJson(data, os.path.join(self.save_path, self.dataset_type+"_"+str(key)+"_"+self.model_name.lower()+'.json'))
            self.saveJson(hit_index, os.path.join("/Agent_ScanKit/utils/utils_odyssey", f"his_index.json"))
            logger.info(f"transform {self.dataset_type} of {self.dataset_name} dataset to json succesfuully")
        logger.info("Finished")


    def GUI_OWL(self):
        build_system_messages, getResizedImage, build_user_messages, sample = super().GUI_OWL()
        def actionMapping(action):
            """
            ['CLICK', 'SCROLL', 'TYPE', 'PRESS_HOME', 'PRESS_BACK', 'ENTER']
            """
            action_type = action['result_action_type']
            action_text = action['result_action_text']
            coat_action_desc = action['coat_action_desc'].lower()
            if action_type == 4:
                if coat_action_desc.startswith("scroll"):
                    if "up" in coat_action_desc:
                        info = [[0, 0], [1, 0]]
                        return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "swipe", "coordinate": [{info[0][0]}, {info[0][1]}], "coordinate2": [{info[1][0]}, {info[1][1]}]}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
                    elif "down" in coat_action_desc:
                        info = [[0, 0], [0, 1]]
                        return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "swipe", "coordinate": [{info[0][0]}, {info[0][1]}], "coordinate2": [{info[1][0]}, {info[1][1]}]}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
                    elif "left" in coat_action_desc:
                        info = [[0, 1], [0, 0]]
                        return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "swipe", "coordinate": [{info[0][0]}, {info[0][1]}], "coordinate2": [{info[1][0]}, {info[1][1]}]}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
                    elif "right" in coat_action_desc:
                        info = [[1, 0], [0, 0]]
                        return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "swipe", "coordinate": [{info[0][0]}, {info[0][1]}], "coordinate2": [{info[1][0]}, {info[1][1]}]}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
                else:
                    click = ast.literal_eval(action['result_touch_yx'])
                    y, x = click[0]*1000, click[1]*1000
                    return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "click", "coordinate": [{x}, {y}]}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
            elif action_type == 6:
                return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "system_button", "button": "Home"}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
            elif action_type == 5:
                return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "system_button", "button": "Back"}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
            elif action_type == 3:
                return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "type", "text": "{action_text}"}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
            elif action_type == 7:
                return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "system_button", "button": "Enter"}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
            elif action_type == 10:
                return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "terminate", "status": "success"}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
            else:
                logger.error(f"Action mapping error: {action}")
            
        path = self._merge_dataset_path()
        for key in path:
            if key == 'train':
                continue
            data = []
            logger.info(f"Processing the {key} samples in the {self.dataset_name}")
            for path_item in path[key]:
                logger.info(f"Processing: {path_item}")
                for episode in path[key][path_item]:
                    metadata = self.readJson(os.path.join(episode, str(episode.split('/')[-1])+".json"))
                    previous_action_history = []
                    index = [i['step_id'] for i in metadata]
                    for i, idx in enumerate(index):
                        from copy import deepcopy
                        record = deepcopy(sample)
                        record["episode_id"] = metadata[i]["episode_id"]
                        record["step_id"] = metadata[i]["step_id"]
                        record["images"] = [os.path.join(episode, str(episode.split('/')[-1])+"_"+str(idx)+'.png')]
                        record['accessibility_trees'] = os.path.join(episode, str(episode.split('/')[-1])+".json")
                        record['goal'] = metadata[i]['instruction']
                        image_size = self.readImage(record['images'][0])
                        record['image_size'] = [[image_size[0], image_size[1]]]
                        try:
                            record['label'] = actionMapping(metadata[i])
                        except Exception as e:
                            logger.info(f'extract action failure: {e}')

                        dummy_image = getResizedImage(record['images'][0])
                        system_messages = build_system_messages(dummy_image.height, dummy_image.width)
                        
                        if 'low' in self.dataset_type:
                            user_messages = build_user_messages(metadata[i]['coat_action_desc'], enable_think=True, history=previous_action_history[:i])
                        else:
                            user_messages = build_user_messages(record['goal'], enable_think=True, history=previous_action_history[:i])
                        user_messages['content'].append({"image": record['images'][0]})
                        messages = [system_messages, user_messages]
                        record['messages'] = message_translate(messages, to_format='qwen')
                        previous_action_history.append(metadata[i]['coat_action_desc'])
                        data.append(record)
             
            if not os.path.exists(self.save_path):
                os.makedirs(self.save_path)
            self.saveJson(data, os.path.join(self.save_path, self.dataset_type+"_"+str(key)+"_"+self.model_name.lower()+'.json'))
            logger.info(f"transform {self.dataset_type} of {self.dataset_name} dataset to json succesfully")
        logger.info("Finished")


    def _merge_dataset_path(self):
        dataset_types = ["general", "google_apps", "install", "web_shopping"]
        dataset_dicts = {"train": {}, "test": {}}

        if 'all' in self.dataset_type:
            for dtype in dataset_types:
                for split in ["train", "test"]:
                    base_dir = os.path.join(self.dataset_path, split, dtype)
                    if os.path.exists(base_dir):
                        subfolders = [
                            os.path.join(base_dir, f)
                            for f in os.listdir(base_dir)
                            if os.path.isdir(os.path.join(base_dir, f))
                        ]
                        dataset_dicts[split][dtype] = subfolders
                    else:
                        dataset_dicts[split][dtype] = []
        else:
            for split in ["train", "test"]:
                base_dir = os.path.join(self.dataset_path, split, self.dataset_type)
                if os.path.exists(base_dir):
                    subfolders = [
                        os.path.join(base_dir, f)
                        for f in os.listdir(base_dir)
                        if os.path.isdir(os.path.join(base_dir, f))
                    ]
                    dataset_dicts[split][self.dataset_type] = subfolders
                else:
                    dataset_dicts[split][self.dataset_type] = []

        return dataset_dicts
            
        

            
    
if __name__ == '__main__':
    args = parse_args()
    logger.info(args)
    process = AITZPreProcess(args.dataset_type, args.dataset_path, args.dataset_name, args.save_path, args.model_name)
    if args.model_name == "OS_ATLAS":
        process.OS_ATLAS()
    elif args.model_name == "UI_TARS" or args.model_name == "UI_TARS_1.5":
        process.UI_TARS()
    elif args.model_name == "GUI_R1":
        process.GUI_R1()
    elif args.model_name == 'Agent_CPM':
        process.Agent_CPM()
    elif args.model_name == 'Aguvis':
        process.Aguvis()
    elif args.model_name == 'OS_Genesis':
        process.OS_Genesis()
    elif args.model_name == 'GUI_Odyssey':
        process.GUI_Odyssey()
    elif args.model_name == 'GUI_OWL':
        process.GUI_OWL()
    else:
        logger.info("error processing")
        
        

